Code
# data from https://www.kaggle.com/datasets/noaa/hurricane-database
df = (
pl.read_csv("./atlantic.csv")
.with_columns(
pl.col("Date")
.cast(str)
.apply(lambda s: s[:4] + "-" + s[4:6] + "-" + s[6:])
.str.strptime(pl.Date, format="%Y-%m-%d"),
pl.col("Latitude").apply(
lambda s: float(str(s[:-1])) if s[-1] == "N" else -float(str(s[:-1]))
),
pl.col("Longitude").apply(
lambda s: -float(str(s[:-1])) if s[-1] == "W" else float(str(s[:-1]))
),
)
.with_columns(pl.col("Maximum Wind").cast(float))
.with_columns(pl.col("Date").dt.year().alias("Year"))
# .filter(pl.col("Date").dt.year() == 2006)
.filter(pl.col("ID") == "AL062006")
.sort("Date", "Time")
.with_columns(
pl.struct("Date", "Time")
.apply(lambda s: f"""{s["Date"]}-{s["Time"]//100:02}-00""")
# .str.strptime(pl.Datetime, format="%Y-%m-%d-%H-%M-%S", strict=False)
.rank()
.cast(int)
.alias("k")
)
.select("Longitude", "Latitude", "ID", "k", "Maximum Wind")
)Code
legend_selection = alt.selection_point(fields=["Year"], bind="legend")
storms = (
alt.Chart(df.to_pandas())
.encode(
longitude=alt.Longitude("Longitude:Q"),
latitude=alt.Latitude("Latitude:Q"),
color=alt.Color("ID:N"),
detail=alt.Detail("ID:N"),
# size=alt.Size("Maximum Wind:Q").scale(range=(0, 5)),
opacity=alt.condition(legend_selection, alt.value(1.0), alt.value(0.1)),
)
.add_params(legend_selection) # of course we could also skip interactivity
)
map_background = alt.Chart(
alt.topo_feature(data.world_110m.url, feature="countries")
).mark_geoshape(stroke="white", strokeWidth=2, color="lightgray")
chart = (
alt.layer(map_background, storms.mark_line(), storms.mark_point(filled=True))
.project(translate=[1000, 600], scale=500)
.properties(width=700, height=700)
)
chartCode
import numpy as np
sigma1 = 0.5
m = 10
dfuq = pl.concat(
[
pl.concat(
[
pl.concat(
[
df.with_columns(
pl.when(pl.col("k") < k)
.then(pl.lit(True))
.otherwise(pl.lit(False))
.alias("is_measured")
)
.with_columns(
pl.Series(
name="lat_noise",
values=sigma1 * np.random.randn(len(df)),
),
pl.Series(
name="lon_noise",
values=sigma1 * np.random.randn(len(df)),
),
)
.with_columns(
pl.when(pl.col("k") <= k)
.then(pl.lit(0))
.otherwise(pl.col("lat_noise"))
.alias("lon_noise"),
pl.when(pl.col("k") <= k)
.then(pl.lit(0))
.otherwise(pl.col("lon_noise"))
.alias("lat_noise"),
)
.with_columns(
(pl.col("Latitude") + pl.col("lat_noise").cumsum()).alias(
"Latitude"
),
(pl.col("Longitude") + pl.col("lon_noise").cumsum()).alias(
"Longitude"
),
)
.with_columns(pl.lit(k).cast(pl.Int64).alias("k"))
]
)
for k in df["k"].unique()
]
).with_columns(pl.lit(sample).alias("sample"))
for sample in range(100)
]
)
dfuq
shape: (220_900, 9)
| Longitude | Latitude | ID | k | Maximum Wind | is_measured | lat_noise | lon_noise | sample |
|---|---|---|---|---|---|---|---|---|
| f64 | f64 | str | i64 | f64 | bool | f64 | f64 | i32 |
| -61.6 | 12.7 | "AL062006" | 1 | 30.0 | false | 0.0 | 0.0 | 0 |
| -63.957082 | 12.665948 | "AL062006" | 1 | 30.0 | false | -0.334052 | -0.957082 | 0 |
| -65.48347 | 13.390762 | "AL062006" | 1 | 30.0 | false | 0.424814 | -0.126388 | 0 |
| -68.651723 | 13.738085 | "AL062006" | 1 | 35.0 | false | -0.052677 | -1.768253 | 0 |
| -69.403604 | 13.749999 | "AL062006" | 1 | 35.0 | false | -0.288086 | 0.548119 | 0 |
| -70.230243 | 14.975154 | "AL062006" | 1 | 40.0 | false | 0.925154 | 0.373361 | 0 |
| -71.554149 | 14.512496 | "AL062006" | 1 | 45.0 | false | -0.762658 | -0.123905 | 0 |
| -72.682457 | 15.166555 | "AL062006" | 1 | 50.0 | false | 0.154059 | -0.228308 | 0 |
| -72.682939 | 15.426643 | "AL062006" | 1 | 55.0 | false | -0.339911 | 0.799518 | 0 |
| -73.916605 | 16.573321 | "AL062006" | 1 | 55.0 | false | 0.546677 | -0.433666 | 0 |
| -74.638173 | 16.161596 | "AL062006" | 1 | 65.0 | false | -1.011725 | -0.021567 | 0 |
| -75.006931 | 16.218394 | "AL062006" | 1 | 55.0 | false | -0.443202 | 0.331241 | 0 |
| … | … | … | … | … | … | … | … | … |
| -77.6 | 35.8 | "AL062006" | 47 | 30.0 | true | 0.0 | 0.0 | 99 |
| -77.2 | 36.6 | "AL062006" | 47 | 40.0 | true | 0.0 | 0.0 | 99 |
| -77.0 | 37.1 | "AL062006" | 47 | 40.0 | true | 0.0 | 0.0 | 99 |
| -76.8 | 37.6 | "AL062006" | 47 | 40.0 | true | 0.0 | 0.0 | 99 |
| -76.7 | 38.2 | "AL062006" | 47 | 40.0 | true | 0.0 | 0.0 | 99 |
| -76.7 | 38.9 | "AL062006" | 47 | 40.0 | true | 0.0 | 0.0 | 99 |
| -76.7 | 39.9 | "AL062006" | 47 | 35.0 | true | 0.0 | 0.0 | 99 |
| -77.1 | 41.3 | "AL062006" | 47 | 25.0 | true | 0.0 | 0.0 | 99 |
| -77.5 | 43.1 | "AL062006" | 47 | 20.0 | true | 0.0 | 0.0 | 99 |
| -77.0 | 44.5 | "AL062006" | 47 | 20.0 | true | 0.0 | 0.0 | 99 |
| -75.8 | 45.6 | "AL062006" | 47 | 20.0 | true | 0.0 | 0.0 | 99 |
| -74.4 | 46.5 | "AL062006" | 47 | 20.0 | false | 0.0 | 0.0 | 99 |
Code
alt.data_transformers.disable_max_rows()
legend_selection = alt.selection_point(fields=["k"], bind="legend")
select_year = alt.selection_point(
name="k",
fields=["k"],
bind=alt.binding_range(min=1, max=50, step=1, name="Progress"),
value=1,
)
storms = (
alt.Chart(dfuq.to_pandas())
.encode(
longitude=alt.Longitude("Longitude:Q"),
latitude=alt.Latitude("Latitude:Q"),
color=(
alt.Color("k:N")
.scale(zero=False, range=["red"], reverse=False)
.legend(columns=4, symbolLimit=0, labelLimit=0)
).legend(None),
detail="sample:N",
# detail=alt.Detail("ID:N"),
# size=alt.Size("Maximum Wind:Q").scale(range=(0, 5)),
# opacity=alt.condition(alt.datum["k"]==select_year.value, alt.value(0.5), alt.value(0.0)),
)
.add_params(select_year) # of course we could also skip interactivity
.transform_filter(select_year)
)
map_background = alt.Chart(
alt.topo_feature(data.world_110m.url, feature="countries")
).mark_geoshape(stroke="white", strokeWidth=2, color="lightgray")
chart = (
alt.layer(
map_background,
storms.mark_line(opacity=0.1),
# storms.mark_trail()
# .encode(size="Maximum Wind:Q")
# .transform_filter(alt.datum["is_measured"]),
)
.project(translate=[1000, 480], scale=550)
.properties(width=700, height=700)
)
chart